rm(list = ls())

#setwd("YOUR-WORKING-DIRECTORY")

# install.packages(c("doParallel", "foreach", "magic"))

set.seed(88)

library("doParallel")
library("foreach")
library("magic")
library("MASS")
library("beepr")

stopCluster(cl)
cl <- makeCluster(detectCores() - 2)
registerDoParallel(cl)

source("replication-functions.R")

n.bootstrap <- 5000

corrs <- c(1.0, 0.0, 0.0, 
           0.0, 1.0, 0.0, 
	       0.0, 0.0, 1.0)


pop.data <- generateComparisonData(n = 1e07, corrs = corrs, gammas = rep(0.5, 3), 
	deltas = rep(0.5, 3), n.control.items = 4, p.t = 0.5, n.groups = 5)



# case 2
{ 

	h <- tapply(pop.data$z, pop.data$group, mean)

	direct.1000 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {
      
      sample.ind <- sample(x = nrow(pop.data), size = 1000, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))

	}


	direct.2500 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 2500, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}


	direct.1e04 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 1e04, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}

}


# case 3
{

	corrs <- c(1.0, 0.0, 0.5, 
		       0.0, 1.0, 0.0, 
		       0.5, 0.0, 1.0)

	pop.data <- generateComparisonData(n = 1e07, corrs = corrs, gammas = rep(0.5, 3), 
		deltas = rep(0.5, 3), n.control.items = 4, p.t = 0.5, n.groups = 5)

	h <- tapply(pop.data$z, pop.data$group, mean)

	direct.corr1.1000 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {
      
      sample.ind <- sample(x = nrow(pop.data), size = 1000, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))

	}


	direct.corr1.2500 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 2500, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}


	direct.corr1.1e04 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 1e04, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}

}


# case 4
{ 

	corrs <- c(1.0, 0.0, 0.5, 
		       0.0, 1.0, 0.5, 
		       0.5, 0.5, 1.0)

	pop.data <- generateComparisonData(n = 1e07, corrs = corrs, gammas = rep(0.5, 3), 
		deltas = rep(0.5, 3), n.control.items = 4, p.t = 0.5, n.groups = 5)

	h <- tapply(pop.data$z, pop.data$group, mean)

	direct.corr2.1000 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {
      
      sample.ind <- sample(x = nrow(pop.data), size = 1000, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))

	}


	direct.corr2.2500 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 2500, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}


	direct.corr2.1e04 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 1e04, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}

}


# case 5
{ 

	corrs <- c(1.0, 0.5, 0.5, 
		       0.5, 1.0, 0.5, 
		       0.5, 0.5, 1.0)

	pop.data <- generateComparisonData(n = 1e07, corrs = corrs, gammas = rep(0.5, 3), 
		deltas = rep(0.5, 3), n.control.items = 4, p.t = 0.5, n.groups = 5)

	h <- tapply(pop.data$z, pop.data$group, mean)

    direct.corr3.1000 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {
      
      sample.ind <- sample(x = nrow(pop.data), size = 1000, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))

	}


	direct.corr3.2500 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 2500, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}


	direct.corr3.1e04 <- foreach(i = 1:n.bootstrap, .combine = "rbind") %dopar% {

      sample.ind <- sample(x = nrow(pop.data), size = 1e04, replace = FALSE)
	  iter.fit <- glm(z ~ x1 + x2, data = pop.data[sample.ind, ], family = binomial("logit"))
      c(coef(iter.fit), std.err(iter.fit))
      
	}

}

beep()


finished.direct <- rbind(
	# cbind(overall.1000, n = 1000, case = "overall"), 
	# cbind(overall.2500, n = 2500, case = "overall"), 
    # cbind(overall.1e04, n = 1e04, case = "overall"), 

	cbind(direct.1000, n = 1000, case = "no-strat"), 
	cbind(direct.2500, n = 2500, case = "no-strat"), 
	cbind(direct.1e04, n = 1e04, case = "no-strat"), 

	cbind(direct.corr1.1000, n = 1000, case = "corr1"), 
	cbind(direct.corr1.2500, n = 2500, case = "corr1"), 
	cbind(direct.corr1.1e04, n = 1e04, case = "corr1"), 

	cbind(direct.corr2.1000, n = 1000, case = "corr2"), 
	cbind(direct.corr2.2500, n = 2500, case = "corr2"), 
	cbind(direct.corr2.1e04, n = 1e04, case = "corr2"), 

	cbind(direct.corr3.1000, n = 1000, case = "corr3"), 
	cbind(direct.corr3.2500, n = 2500, case = "corr3"), 
	cbind(direct.corr3.1e04, n = 1e04, case = "corr3")
	)

finished.direct <- as.data.frame(finished.direct)

write.csv(finished.direct, "direct-est.csv", row.names = FALSE)

finished.direct[, 1:6] <- apply(as.matrix(finished.direct[, 1:6]), 2, function(x) as.numeric(as.character(x)))

direct.results <- rbind(colMeans(subset(finished.direct, select = 1:6, subset = n == 1000 & case == "no-strat")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 2500 & case == "no-strat")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 1e04 & case == "no-strat")),

      colMeans(subset(finished.direct, select = 1:6, subset = n == 1000 & case == "corr1")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 2500 & case == "corr1")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 1e04 & case == "corr1")),

      colMeans(subset(finished.direct, select = 1:6, subset = n == 1000 & case == "corr2")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 2500 & case == "corr2")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 1e04 & case == "corr2")),

      colMeans(subset(finished.direct, select = 1:6, subset = n == 1000 & case == "corr3")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 2500 & case == "corr3")),
      colMeans(subset(finished.direct, select = 1:6, subset = n == 1e04 & case == "corr3")))

# See script aux-fig-4-simulations.R for these results
finished <- read.csv("ictreg-fig-1.csv")

noaux.results <- rbind(colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1000 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 2500 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1e04 & case == "subgroup")),

      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1000 & case == "corr1")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 2500 & case == "corr1")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1e04 & case == "corr1")),

      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1000 & case == "corr2")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 2500 & case == "corr2")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1e04 & case == "corr2")),

      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1000 & case == "corr3")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 2500 & case == "corr3")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 0 & n == 1e04 & case == "corr3")))


aux.results <- rbind(colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1000 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 2500 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1e04 & case == "subgroup")),

      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1000 & case == "corr1")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 2500 & case == "corr1")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1e04 & case == "corr1")),

      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1000 & case == "corr2")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 2500 & case == "corr2")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1e04 & case == "corr2")),

      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1000 & case == "corr3")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 2500 & case == "corr3")),
      colMeans(subset(finished, select = c(1:3, 7:9), subset = augmented == 1 & n == 1e04 & case == "corr3")))



eff.table.list <- rbind(

    # scenario 1: n = 2500
        cbind(
    c(direct.results[2, 4], noaux.results[2, 4], aux.results[2, 4]), 
        c(NA, noaux.results[2, 4]/direct.results[2, 4], aux.results[2, 4]/direct.results[2, 4]), 

        c(direct.results[2, 5], noaux.results[2, 5], aux.results[2, 5]), 
        c(NA, noaux.results[2, 5]/direct.results[2, 5], aux.results[2, 5]/direct.results[2, 5]), 
        
        c(direct.results[2, 6], noaux.results[2, 6], aux.results[2, 6]), 
        c(NA, noaux.results[2, 6]/direct.results[2, 6], aux.results[2, 6]/direct.results[2, 6]) 
        ),
    
    # scenario 2: n = 2500
        cbind(
    c(direct.results[5, 4], noaux.results[5, 4], aux.results[5, 4]), 
        c(NA, noaux.results[5, 4]/direct.results[5, 4], aux.results[5, 4]/direct.results[5, 4]), 

        c(direct.results[5, 5], noaux.results[5, 5], aux.results[5, 5]), 
        c(NA, noaux.results[5, 5]/direct.results[5, 5], aux.results[5, 5]/direct.results[5, 5]), 
        
        c(direct.results[5, 6], noaux.results[5, 6], aux.results[5, 6]), 
        c(NA, noaux.results[5, 6]/direct.results[5, 6], aux.results[5, 6]/direct.results[5, 6]) 
        ),

        # scenario 3: n = 2500
        cbind(
    c(direct.results[8, 4], noaux.results[8, 4], aux.results[8, 4]), 
        c(NA, noaux.results[8, 4]/direct.results[8, 4], aux.results[8, 4]/direct.results[8, 4]), 

        c(direct.results[8, 5], noaux.results[8, 5], aux.results[8, 5]), 
        c(NA, noaux.results[8, 5]/direct.results[8, 5], aux.results[8, 5]/direct.results[8, 5]), 
        
        c(direct.results[8, 6], noaux.results[8, 6], aux.results[8, 6]), 
        c(NA, noaux.results[8, 6]/direct.results[8, 6], aux.results[8, 6]/direct.results[8, 6]) 
        ),

        # scenario 4: n = 2500
        cbind(
    c(direct.results[11, 4], noaux.results[11, 4], aux.results[11, 4]), 
        c(NA, noaux.results[11, 4]/direct.results[11, 4], aux.results[11, 4]/direct.results[11, 4]), 

        c(direct.results[11, 5], noaux.results[11, 5], aux.results[11, 5]), 
        c(NA, noaux.results[11, 5]/direct.results[11, 5], aux.results[11, 5]/direct.results[11, 5]), 
        
        c(direct.results[11, 6], noaux.results[11, 6], aux.results[11, 6]), 
        c(NA, noaux.results[11, 6]/direct.results[11, 6], aux.results[11, 6]/direct.results[11, 6]) 
        )

)



# See script aux-fig-3-fig-5-simulations.R for these results
finished <- read.csv("rrreg-fig-1.csv")

noaux.results <- rbind(colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1000 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 2500 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1e04 & case == "subgroup")),

      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1000 & case == "corr1")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 2500 & case == "corr1")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1e04 & case == "corr1")),

      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1000 & case == "corr2")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 2500 & case == "corr2")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1e04 & case == "corr2")),

      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1000 & case == "corr3")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 2500 & case == "corr3")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 0 & n == 1e04 & case == "corr3")))


aux.results <- rbind(colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1000 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 2500 & case == "subgroup")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1e04 & case == "subgroup")),

      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1000 & case == "corr1")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 2500 & case == "corr1")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1e04 & case == "corr1")),

      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1000 & case == "corr2")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 2500 & case == "corr2")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1e04 & case == "corr2")),

      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1000 & case == "corr3")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 2500 & case == "corr3")),
      colMeans(subset(finished, select = c(1:6), subset = augmented == 1 & n == 1e04 & case == "corr3")))



eff.table.rr <- rbind(

    # scenario 1: n = 2500
        cbind(
    c(direct.results[2, 4], noaux.results[2, 4], aux.results[2, 4]), 
        c(NA, noaux.results[2, 4]/direct.results[2, 4], aux.results[2, 4]/direct.results[2, 4]), 

        c(direct.results[2, 5], noaux.results[2, 5], aux.results[2, 5]), 
        c(NA, noaux.results[2, 5]/direct.results[2, 5], aux.results[2, 5]/direct.results[2, 5]), 
        
        c(direct.results[2, 6], noaux.results[2, 6], aux.results[2, 6]), 
        c(NA, noaux.results[2, 6]/direct.results[2, 6], aux.results[2, 6]/direct.results[2, 6]) 
        ),
    
    # scenario 2: n = 2500
        cbind(
    c(direct.results[5, 4], noaux.results[5, 4], aux.results[5, 4]), 
        c(NA, noaux.results[5, 4]/direct.results[5, 4], aux.results[5, 4]/direct.results[5, 4]), 

        c(direct.results[5, 5], noaux.results[5, 5], aux.results[5, 5]), 
        c(NA, noaux.results[5, 5]/direct.results[5, 5], aux.results[5, 5]/direct.results[5, 5]), 
        
        c(direct.results[5, 6], noaux.results[5, 6], aux.results[5, 6]), 
        c(NA, noaux.results[5, 6]/direct.results[5, 6], aux.results[5, 6]/direct.results[5, 6]) 
        ),

        # scenario 3: n = 2500
        cbind(
    c(direct.results[8, 4], noaux.results[8, 4], aux.results[8, 4]), 
        c(NA, noaux.results[8, 4]/direct.results[8, 4], aux.results[8, 4]/direct.results[8, 4]), 

        c(direct.results[8, 5], noaux.results[8, 5], aux.results[8, 5]), 
        c(NA, noaux.results[8, 5]/direct.results[8, 5], aux.results[8, 5]/direct.results[8, 5]), 
        
        c(direct.results[8, 6], noaux.results[8, 6], aux.results[8, 6]), 
        c(NA, noaux.results[8, 6]/direct.results[8, 6], aux.results[8, 6]/direct.results[8, 6]) 
        ),

        # scenario 4: n = 2500
        cbind(
    c(direct.results[11, 4], noaux.results[11, 4], aux.results[11, 4]), 
        c(NA, noaux.results[11, 4]/direct.results[11, 4], aux.results[11, 4]/direct.results[11, 4]), 

        c(direct.results[11, 5], noaux.results[11, 5], aux.results[11, 5]), 
        c(NA, noaux.results[11, 5]/direct.results[11, 5], aux.results[11, 5]/direct.results[11, 5]), 
        
        c(direct.results[11, 6], noaux.results[11, 6], aux.results[11, 6]), 
        c(NA, noaux.results[11, 6]/direct.results[11, 6], aux.results[11, 6]/direct.results[11, 6]) 
        )

)

xtable(eff.table.rr)

mean(c(
	eff.table.list[2, 4] - eff.table.list[3, 4],
	eff.table.list[5, 4] - eff.table.list[6, 4],
	eff.table.list[8, 4] - eff.table.list[9, 4],
	eff.table.list[11, 4] - eff.table.list[12, 4],

    eff.table.list[2, 6] - eff.table.list[3, 6],
	eff.table.list[5, 6] - eff.table.list[6, 6],
	eff.table.list[8, 6] - eff.table.list[9, 6],
	eff.table.list[11, 6] - eff.table.list[12, 6]
	)
)

mean(
    c(eff.table.rr[2, 4] - eff.table.rr[3, 4],
	eff.table.rr[5, 4] - eff.table.rr[6, 4],
	eff.table.rr[8, 4] - eff.table.rr[9, 4],
	eff.table.rr[11, 4] - eff.table.rr[12, 4],

    eff.table.rr[2, 6] - eff.table.rr[3, 6],
	eff.table.rr[5, 6] - eff.table.rr[6, 6],
	eff.table.rr[8, 6] - eff.table.rr[9, 6],
	eff.table.rr[11, 6] - eff.table.rr[12, 6])
	)
